# eliminateBreakoffs.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.

# On page 7 of our pre-analysis plan, we say that following an AAPOR 
# definition, we'll screen out all subjects who answered fewer than half of 
# our questions. AAPOR calls these subjects "break-offs."
#   It isn't straightforward to determine which subjects should be coded as 
# AAPOR break-offs. This is so mainly because an empty answer ("") to an 
# open-ended question may indicate that someone had stopped taking the 
# survey, that she skipped the question because she didn't know the answer, 
# that she skipped it out of boredom (or for some related reason), or that 
# she wasn't assigned to the open-ended condition at all. 
#   Classification for subjects who received closed-ended questions are much
# clearer. Skips in response to these questions are coded as -99, and people 
# who broke off the survey at or after a given closed-ended question are 
# coded as giving empty answers (""). 
#   We err on the side of caution by coding as break-offs only those 
# subjects who skipped more than half of the closed-ended questions in our 
# study. A more expansive coding of break-offs has no material effect on our
# results.
eliminateBreakoffs <- function (originalData, remove = TRUE) {
  
  PenceSkipped      <- originalData$Q113 %in% c('-99', '')
  YellenSkipped     <- originalData$Q94  %in% c('-99', '')
  SenateTermSkipped <- originalData$Q95  %in% c('-99', '')
  
  ProcedureForChoosingJusticesSkipped <- originalData$Q5.1 %in% c('-99', '')         
  WatchLawyersArgueSkipped            <- originalData$Q6.1 %in% c('-99', '')
  FinalSaySkipped                     <- originalData$Q7.1 %in% c('-99', '')
  IfJusticesSplitSkipped              <- originalData$Q16.1 %in% c('-99', '')
  screenerSalarySkipped               <- originalData$Q101  %in% c('-99', '')
  CourtPowerSkipped                   <- originalData$Q21.1 %in% c('-99', '')
  JusticeRemovalSkipped               <- originalData$Q22.1 %in% c('-99', '')
  PID_stemSkipped                     <- originalData$Q24.2 %in% c('-99', '')
  yearOfBirthSkipped                  <- originalData$Q25.2 == ''
  raceSkipped                         <- originalData$Q25.3 == ''
  genderSkipped                       <- originalData$Q25.4 == ''
  schoolingSkipped                    <- originalData$Q109 %in% c('-99', '')
  stateOfResSkipped                   <- originalData$Q112 %in% c('-99', '')
                         
  breakOffQuestions      <- mget(ls(pattern = "[Ss]kipped$")) %>% data.frame
  originalData$breakOffs <- apply(breakOffQuestions, 1, sumNA) > ncol(breakOffQuestions)/2  

  
  # RETURN THE DATASET 
  if (remove) {
    originalData <- originalData[!originalData$breakOffs, ]
  }
  originalData

}